1. First Analysis

Preparing data

gastroc <-
  readr::read_tsv("./data/counts/readcount_genename_gastroc.xls",
                  show_col_types = F) %>%
  tibble::column_to_rownames(var = "gene_id")
counts.gastroc <- gastroc[, 1:12]

# TODO: fix annotations: several entries are incomplete whereas the gastroc seems to be complete
soleus <- readr::read_tsv("./data/counts/readcount_genename_soleus.xls", show_col_types = F) %>%
  tibble::column_to_rownames(var = "gene_id")
## Warning: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
##   dat <- vroom(...)
##   problems(dat)
counts.soleus <- soleus[, 1:12]

Filtering

omitting genes with zero counts in more than 30% of the samples

most variable genes (top 1000) & Normalization

To obtain the most variable genes, a z-score will be applied per gene to then take the sd and filter for the top 1000

z-score:

\(\frac{x - mean(x)}{sd(x)}\)

zscore <- function(M) {
  s <- apply( M,1,sd )      # standard deviation
  µ <- apply( M, 1, mean )  # mean
  M.z <- (M - µ) / s        # zscore
  return(M.z)
}

# ' returning the raw counts of the most variable genes by applying zscore and sd
mostVariableRows <- function(M, ntop=1000) {
  M.z <- zscore(M)
  
  # ordering by sd descending
  sdev <- apply(M.z, 1, sd)
  M.top <- M[order(sdev, decreasing = T)[1:ntop] , ]
  return(M.top)
}

counts.gastroc.top <- mostVariableRows(counts.gastroc)
counts.soleus.top <- mostVariableRows(counts.soleus)

PCA

Here also taking the top 1000 most variable genes of each tissue and merging them.

# combine both count tables
counts.both <-
  merge(counts.gastroc.top,
        counts.soleus.top,
        by = 0,
        suffixes = c("_gastroc", "_soleus")) %>%
  tibble::column_to_rownames("Row.names")

pca <- prcomp(t(counts.both), scale. = T)

pca.data <- data.frame(Sample = rownames(pca$x),
                       X = pca$x[, 1],
                       Y = pca$x[, 2]) %>%
  mutate(type = substr(Sample, 1, 2),
         tissue = stringr::str_extract(Sample,"[:alpha:]+$"))

plt <-
  autoplot(
    pca,
    data = pca.data,
    colour = 'type',
    shape = 'tissue',
    label.show.legend = FALSE
  ) +
  ggtitle("PCA both tissues")+
  theme_bw()
ggsave(filename = file.path(path.currentPlots, "01_pca_both_tissues_filtered.svg"), plt)
plt

This plot looks different than before! The overall cluster sill exist, but slightly shifted and rotated. The PC1 has now 48% whereas it previously had 40% and PC2 26% vs 25%.

I thought I did not change anything and after another plotting run, the plot changed. Cleaning environment and workspace did not change anything.

2. DESeq

Volcano Plot

volcanoPlot <- function(result, se, pCutoff = 0.05, FCutoff = 1, tissue = character()) {
  gene_names <-
    rowData(se)[rownames(result), c("gene_name"), drop = F]
  results.df <- result %>%
    as.data.frame() %>%
    dplyr::arrange(padj)
  
  # top 10 gene labels for respectively up and down regulation
  labs.up <- results.df[results.df$log2FoldChange > FCutoff, ] %>%
    rownames() %>% .[1:10] %>% gene_names[., c("gene_name")]
  labs.down <- results.df[results.df$log2FoldChange < -FCutoff, ] %>%
    rownames() %>% .[1:10] %>% gene_names[., c("gene_name")]
  selectLab <- c(labs.up, labs.down, "Nfe2l1") %>% unique() # always including "Nfe2l1"
  
  # custom colors:
  keyvals <- ifelse(
    result$log2FoldChange < -FCutoff &
      result$padj < pCutoff,
    'royalblue',
    ifelse(
      result$log2FoldChange > FCutoff &
        result$padj < pCutoff,
      'red',
      'gray'
    )
  )
  keyvals[is.na(keyvals)] <- 'gray'
  names(keyvals)[keyvals == 'red'] <- 'up regulated'
  names(keyvals)[keyvals == 'gray'] <- 'nonsignificant'
  names(keyvals)[keyvals == 'royalblue'] <- 'down regulated'
  
  vlc.plt <- EnhancedVolcano(
    result,
    x = 'log2FoldChange',
    y = 'padj',
    title = 'WT vs KO: Nfe2l1 knockout',
    subtitle = ifelse(isEmpty(tissue), "", paste0('tissue: ', tissue)),
    caption = "",
    ylab = expression(paste(-Log[10], p[adj])),
    pCutoff = pCutoff,
    FCcutoff = FCutoff,
    legendPosition = 'right',
    pointSize = 2,
    colCustom = keyvals,
    lab = gene_names$gene_name,
    selectLab = selectLab,
    labSize = 3,
    boxedLabels = TRUE,
    drawConnectors = TRUE,
    max.overlaps = Inf
  )
  
  return(vlc.plt)
}


p <- volcanoPlot(res.gastroc, se.gastroc, pCutoff = pCutoff, FCutoff = FCutoff, tissue = "gastrocnemius")
ggsave(filename = file.path(path.currentPlots, "02_volcano_gastroc.svg"), p)
## Saving 9 x 9 in image
p

p <- volcanoPlot(res.soleus, se.soleus, pCutoff = pCutoff, FCutoff = FCutoff, tissue = "soleus")
ggsave(filename = file.path(path.currentPlots, "02_volcano_soleus.svg"), p)
## Saving 9 x 9 in image
p

scatter-plot: most differential genes, both tissues

using the Wald-test stat from the DESeq2 result and filtering on the set FCutoff=1 and pCutoff=0.01 yields the following plot:

apply_cutoffs <- function(deseq.result, colname="stat", FCutoff, pCutoff) {
  res.filtered <- deseq.result %>%
    data.frame() %>%
    filter(padj < pCutoff,
           log2FoldChange > FCutoff | log2FoldChange < -FCutoff) %>%
    dplyr::rename(!!colname := stat) %>%
    dplyr::select(!!colname)
  return(res.filtered)
}

gastroc_res.filtered <- apply_cutoffs(res.gastroc, colname="gastroc", FCutoff, pCutoff)
soleus_res.filtered  <- apply_cutoffs(res.soleus,  colname="soleus",  FCutoff, pCutoff)

gene_names <- rowData(se.gastroc) %>% as.data.frame() %>% 
  dplyr::select(gene_name)

# combining Wald-Test data from both tissues and ordering in quadrants
res.combined <- merge(gastroc_res.filtered,
                      soleus_res.filtered,
                      by = 0) %>%
  mutate(diff.exp = case_when(
    gastroc < 0 & soleus < 0 ~ "both down",
    gastroc > 0 & soleus > 0 ~ "both up",
    gastroc < 0 & soleus > 0 ~ "ga down, sol up",
    gastroc > 0 & soleus < 0 ~ "ga up, sol down",
    TRUE ~ "different"
  )) %>% 
  merge(gene_names, by.x="Row.names", by.y=0)
 
# removing all gene_names except the top_n_genes (sum of absolute Wald-test numbers)
top_n_genes <- 10
top_labels <- res.combined %>%
  group_by(diff.exp) %>% 
  arrange(desc(abs(gastroc) + abs(soleus))) %>%
  filter(row_number() %in% c(1:top_n_genes)) %>% 
  ungroup() %>% 
  .$gene_name

res.combined <- res.combined %>% 
  mutate(gene_name = ifelse(gene_name %in% top_labels, gene_name, ""))

# final plot
p <- ggplot(res.combined, aes(x = gastroc, y = soleus, label = gene_name)) +
  geom_vline(xintercept = 0) + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(color = diff.exp)) +
  # scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
  labs(x = "gastroc", y = "soleus") +
  ggrepel::geom_label_repel(max.overlaps = 20) + 
  ggtitle(label = "stat (Wald test)") +
  theme_bw()

ggsave(filename = file.path(path.currentPlots, "02_dea_scatter.svg"), p)
## Saving 9 x 9 in image
p

barplot

p <- ggplot(res.combined, aes(x = diff.exp)) +
  geom_bar(aes(fill = diff.exp)) +
  theme_bw()
ggsave(filename = file.path(path.currentPlots, "02_dea_scatter_barplot.svg"), p)
## Saving 7 x 5 in image
p

Venn/Euler-Diagram

col_palette <- RColorBrewer::brewer.pal(8, "Dark2")
# venn.colors <- c(gastroc = "#FFB08E", soleus = "#FF6638", col_palette[1:4])
venn.colors <- c(tissue_pal, scales::hue_pal()(4))

# only the two tissues
gene_sets <- c(
  "gastroc" = sign_gene_stats$gastroc - sign_gene_stats$shared_sig_genes,
  "soleus" = sign_gene_stats$soleus - sign_gene_stats$shared_sig_genes,
  "gastroc&soleus" = sign_gene_stats$shared_sig_genes
)

# 1st option: euler two tissues
p <- plot(
  euler(gene_sets),
  quantities = T,
  legend = list(side = "right"),
  fills = venn.colors,
  main = "significant genes"
)
ggsave(filename = file.path(path.currentPlots, "02_euler.svg"), p)
## Saving 7 x 5 in image
p

# 2nd option: venn two tissues
library(eulerr)
p <- plot(
  eulerr::venn(gene_sets),
  fills = tissue_pal,
  main = "significant genes"
)
ggsave(filename = file.path(path.currentPlots, "02_venn.svg"), p)
## Saving 7 x 5 in image
p

Heatmap

library(ComplexHeatmap)
library(RColorBrewer)

zscore <- function(M) {
  s <- apply( M,1,sd )      # standard deviation
  µ <- apply( M, 1, mean )  # mean
  M.z <- (M - µ) / s        # zscore
  return(M.z)
}

sign_genes <-
  unique(c(
    row.names(gastroc_res.filtered),
    row.names(soleus_res.filtered)
  )) 
sign_genes <-
  sign_genes[sign_genes %in% rownames(counts.gastroc) &
               sign_genes %in% rownames(counts.soleus)]

counts_sign_zscored <- merge(
  zscore(counts.gastroc[sign_genes,]),
  zscore(counts.soleus[sign_genes,]),
  by = 0,
  suffixes = c(".ga", ".sol")
) %>% 
  tibble::column_to_rownames(var="Row.names") %>%
  as.matrix()


plotCHM <- function(counts_sign) {
  mypalette <- brewer.pal(11, "RdYlBu")
  # (reverse to map style of other heatmap in manuscript)
  morecols <- colorRampPalette(rev(mypalette))
  
  tissue_vec <- c(rep("gastroc", 12), rep("soleus", 12))
  condition_vec <- c(rep(c(rep("WT", 6), rep("KO", 6)),2))
  top_annot <-
    HeatmapAnnotation(
      tissue = tissue_vec,
      condition = condition_vec,
      col = list(tissue = tissue_pal, condition = c("WT" = "white", "KO" = "gray")),
      gp = gpar(col = "darkgray"),
      show_legend = FALSE
    )
  
  chm <- Heatmap(
    counts_sign,
    row_title = "genes",
    name = "zscore",
    show_row_names = FALSE,
    show_column_names = FALSE,
    column_title = "samples",
    col = rev(morecols(50)),
    column_title_side = "bottom",
    top_annotation = top_annot
  )
  
  # creating custom annotation legend (to obtain the gray border)
  condition_legend = Legend(
    labels = c("WT", "KO"),
    legend_gp = gpar(fill = c("white", "gray")),
    border = "darkgray",
    title = "condition"
  )
  tissue_legend = Legend(
    labels = c("gastroc", "soleus"),
    legend_gp = gpar(fill = c("orange", "purple")),
    border = "darkgray",
    title = "tissue"
  )
  legend_list <- list(condition_legend, tissue_legend)
  
  draw(chm, annotation_legend_list = legend_list)
}

svglite::svglite(
  file.path(path.currentPlots, "03_heatmap_sign_genes.svg"),
  width = 6,
  height = 6
)
plotCHM(counts_sign_zscored)
dev.off()
## quartz_off_screen 
##                 2

pca (z-scored)

Here the z-scored count matrix is used to create the pca plot

pca <- prcomp(t(counts_sign_zscored), scale. = T)

pca.data <- data.frame(Sample = rownames(pca$x),
                     X = pca$x[, 1],
                     Y = pca$x[, 2]) %>%
mutate(type = substr(Sample, 1, 2),
       tissue = stringr::str_extract(Sample,"[:alpha:]+$"))

p <-
autoplot(
  pca,
  data = pca.data,
  colour = 'type',
  shape = 'tissue',
  label.show.legend = FALSE
) +
  ggtitle("PCA z-scored sign. genes") +
  theme_bw()
ggsave(filename = file.path(path.currentPlots, "03_pca_dea_sign_zscored.svg"), p)
p

pca (counts)

gathering the original count matrices of both tissues for the significant genes, which were shown in the heatmap.

counts_sign <-
  merge(counts.gastroc[sign_genes,],
        counts.soleus[sign_genes,],
        by = 0,
        suffixes = c("_gastroc", "_soleus")) %>%
  tibble::column_to_rownames("Row.names")

pca <- prcomp(t(counts_sign), scale. = T)

pca.data <- data.frame(Sample = rownames(pca$x),
                     X = pca$x[, 1],
                     Y = pca$x[, 2]) %>%
mutate(type = substr(Sample, 1, 2),
       tissue = stringr::str_extract(Sample,"[:alpha:]+$"))

p <-
autoplot(
  pca,
  data = pca.data,
  colour = 'type',
  shape = 'tissue',
  label.show.legend = FALSE
) +
  ggtitle("PCA sign. genes") +
  theme_bw()
ggsave(filename = file.path(path.currentPlots, "03_pca_dea_sign_counts.svg"), p)
## Saving 7 x 5 in image
p

3. fgsea

ranks

getRanks <- function(res, annot) {
  # only taking genes which have entrezgene_ids assigned to them
  genes_with_entrez <- select(annot, GeneID, entrezgene_id) %>% 
    filter(!is.na(entrezgene_id))
  
  ranks <- as.data.frame(res) %>%
    tibble::rownames_to_column("GeneID") %>%
    merge(genes_with_entrez, by = "GeneID") %>%
    arrange(desc(stat)) %>% 
    select(entrezgene_id, stat) %>% 
    tibble::deframe() # creating a named num from two columns
  return(ranks)
}

ranks.gastroc <- getRanks(res.gastroc, annot)
ranks.soleus <- getRanks(res.soleus, annot)
# TODO: why (again) is the soleus gene count seemingly 300 below soleus gene count / ranks count
# -> seems because of e.g. the cutoff at DESeq

applying fgsea

fgseaRes <- fgsea(
  pathways = CGP,
  stats    = ranks,
  minSize  = 15,
  maxSize  = 200
)

most differential regulated pathways, both tissues

using NES from the fgsea result filtering on the set `pCutoff=`0.01 yields the following plot:

pCutoff <- params$pCutoff
fgseaRes.combined <- merge(
  data.frame(fgseaRes.gastroc[, c("pathway", "NES", "padj")]),
  data.frame(fgseaRes.soleus[, c("pathway", "NES", "padj")]),
  by = "pathway",
  suffixes = c(".ga", ".sol")
) %>%
  filter(padj.ga < pCutoff | padj.sol < pCutoff) %>%
  mutate(
    diff.exp = case_when(
      NES.ga  < 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both down",
      NES.ga  > 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both up",
      NES.ga  < 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "ga down, sol up",
      NES.ga  > 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "ga up, sol down",
                                  padj.ga < pCutoff & padj.sol > pCutoff ~ "only gastroc",
      # NES.ga  > 0 &               padj.ga < pCutoff & padj.sol > pCutoff ~ "ga up",
                                  padj.ga > pCutoff & padj.sol < pCutoff ~ "only soleus",
      # NES.sol > 0 &               padj.ga > pCutoff & padj.sol < pCutoff ~ "sol up",
      TRUE ~ "different"
    )
  )

# final plot
p <- ggplot(fgseaRes.combined, aes(x = NES.ga, y = NES.sol, text=pathway)) +
  geom_vline(xintercept = 0) + 
  geom_hline(yintercept = 0) + 
  geom_point(aes(color = diff.exp)) +
  # scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
  labs(x = "gastroc", y = "soleus") +
  # ggrepel::geom_label_repel(max.overlaps = 20) + 
  ggtitle(label = "NES") +
  theme_bw()

ggsave(filename = file.path(path.currentPlots, "03_gsea_scatter.svg"), p)
## Saving 7 x 5 in image
p

# interactive:
# plotly::ggplotly(p, tooltip = "all")

barplot

p <- ggplot(fgseaRes.combined, aes(x = diff.exp)) +
  geom_bar(aes(fill = diff.exp)) +
  theme_bw()

ggsave(filename = file.path(path.currentPlots, "03_gsea_scatter_barplot.svg"), p)
## Saving 7 x 5 in image
p

Venn/Euler-Diagram

col_palette <- RColorBrewer::brewer.pal(8, "Dark2")
# venn.colors <- c(gastroc = "#FFB08E", soleus = "#FF6638", col_palette[1:4])
venn.colors <- c(tissue_pal, scales::hue_pal()(4))

sign_geneSets_gastroc <-
  data.frame(fgseaRes.gastroc[, c("pathway", "padj")]) %>%
  filter(padj < pCutoff) %>% 
  pull(pathway)


sign_geneSets_soleus <-
  data.frame(fgseaRes.soleus[, c("pathway", "padj")]) %>%
  filter(padj < pCutoff) %>% 
  pull(pathway)

# only the two tissues
gene_sets <- list(
  "gastroc" = sign_geneSets_gastroc,
  "soleus" = sign_geneSets_soleus#,
  # "gastroc&soleus" = sign_gene_stats$shared_sig_genes
)

# 1st option: euler two tissues
p <- plot(
  euler(gene_sets),
  quantities = T,
  legend = list(side = "right"),
  fills = venn.colors,
  main = "significant gene sets (GSEA)"
)
ggsave(filename = file.path(path.currentPlots, "03_euler.svg"), p)
## Saving 7 x 5 in image
p

# 2nd option: venn two tissues
library(eulerr)
p <- plot(
  eulerr::venn(gene_sets),
  fills = tissue_pal,
  main = "significant gene sets (GSEA)"
)
ggsave(filename = file.path(path.currentPlots, "03_venn.svg"), p)
## Saving 7 x 5 in image
p

dotplot

dotplot: “both upregulated”

only 7 gene sets => fits well in the plot

group = "both up"
p_combined <- combined_dotplot(group, fgseaRes.combined)

ggsave(filename = file.path(path.currentPlots, paste0("03_gsea_dotplot_", group, ".svg")),
       p_combined)
## Saving 7 x 5 in image
p_combined

dotplot: “both down”

gets adjusted by the function well. The .svg file looks okay

group = "both down"
p_combined <- combined_dotplot(group, fgseaRes.combined)

ggsave(filename = file.path(path.currentPlots, paste0("03_gsea_dotplot_", group, ".svg")),
       p_combined)
## Saving 7 x 5 in image
p_combined

dotplot: “only soleus”

gets adjusted by the function well. The .svg file looks okay

group = "only soleus"
p_combined <- combined_dotplot(group, fgseaRes.combined, max_label_length = 100)

ggsave(filename = file.path(path.currentPlots, paste0("03_gsea_dotplot_", group, ".svg")),
       p_combined)
## Saving 7 x 5 in image
p_combined

dotplot: top20